package hso.webmagic;

import us.codecraft.webmagic.Site;

public abstract class WebMagicConfig {
	
	private String cookie;
	
	public WebMagicConfig(String cookie) {
		this.cookie = cookie;
		setSite();
	}
	
	public WebMagicConfig() {}
	
	// 部分一：抓取网站的相关配置，包括编码、抓取间隔、重试次数等
	protected Site site;
	public void setSite() {
			this.site = Site.me()
					.setRetryTimes(100)
					.setSleepTime(500)
					.setTimeOut(50000)
					 //添加cookie之前一定要先设置主机地址，否则cookie信息不生效  
		            .setDomain("xx.xx.com")
		            //添加抓包获取的cookie信息  
		            .addCookie("FN", "0")
		            .addCookie("PN", "0")
		            .addCookie("TZ", "R01UKzA4OjAw")
		            .addCookie("VM", "0")
		            .addCookie("ET", "-1")
		            .addCookie("JSESSIONID", cookie)
		          //添加请求头，有些网站会根据请求头判断该请求是由浏览器发起还是由爬虫发起的  
		            .addHeader("User-Agent",  
		                    "ozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.516.400 QQBrowser/9.4.8188.400")  
		            .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")  
		            .addHeader("Accept-Encoding", "gzip, deflate, sdch").addHeader("Accept-Language", "zh-CN,zh;q=0.8")  
		            .addHeader("Connection", "keep-alive").addHeader("Referer", "http://xx.com/sso/forum/forum_forumManage.action");
		;
		}

	public abstract void start();
	
	public abstract void startByDate(String lasttime);

}
